{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "CLIP Guided Images Mixing With Stable Diffusion\n",
    "\n",
    "CLIP guided stable diffusion images mixing pipeline allows to combine two images using standard diffusion models. This approach is using (optional) CoCa model to avoid writing image description. This script was contributed by [Karachev Denis](https://github.com/TheDenk) and notebook by [Parag Ekbote](https://github.com/ParagEkbote)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: torch in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (2.2.1+cu121)\n",
      "Requirement already satisfied: matplotlib in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (3.8.2)\n",
      "Requirement already satisfied: Pillow in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (11.1.0)\n",
      "Requirement already satisfied: diffusers in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (0.32.2)\n",
      "Requirement already satisfied: transformers in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (4.48.3)\n",
      "Collecting open_clip_torch\n",
      "  Downloading open_clip_torch-2.30.0-py3-none-any.whl.metadata (31 kB)\n",
      "Requirement already satisfied: filelock in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from torch) (3.17.0)\n",
      "Requirement already satisfied: typing-extensions>=4.8.0 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from torch) (4.12.2)\n",
      "Requirement already satisfied: sympy in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from torch) (1.13.3)\n",
      "Requirement already satisfied: networkx in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from torch) (3.4.2)\n",
      "Requirement already satisfied: jinja2 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from torch) (3.1.5)\n",
      "Requirement already satisfied: fsspec in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from torch) (2025.2.0)\n",
      "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from torch) (12.1.105)\n",
      "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from torch) (12.1.105)\n",
      "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from torch) (12.1.105)\n",
      "Requirement already satisfied: nvidia-cudnn-cu12==8.9.2.26 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from torch) (8.9.2.26)\n",
      "Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from torch) (12.1.3.1)\n",
      "Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from torch) (11.0.2.54)\n",
      "Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from torch) (10.3.2.106)\n",
      "Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from torch) (11.4.5.107)\n",
      "Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from torch) (12.1.0.106)\n",
      "Requirement already satisfied: nvidia-nccl-cu12==2.19.3 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from torch) (2.19.3)\n",
      "Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from torch) (12.1.105)\n",
      "Requirement already satisfied: triton==2.2.0 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from torch) (2.2.0)\n",
      "Requirement already satisfied: nvidia-nvjitlink-cu12 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from nvidia-cusolver-cu12==11.4.5.107->torch) (12.8.61)\n",
      "Requirement already satisfied: contourpy>=1.0.1 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from matplotlib) (1.3.1)\n",
      "Requirement already satisfied: cycler>=0.10 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from matplotlib) (0.12.1)\n",
      "Requirement already satisfied: fonttools>=4.22.0 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from matplotlib) (4.55.8)\n",
      "Requirement already satisfied: kiwisolver>=1.3.1 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from matplotlib) (1.4.8)\n",
      "Requirement already satisfied: numpy<2,>=1.21 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from matplotlib) (1.26.4)\n",
      "Requirement already satisfied: packaging>=20.0 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from matplotlib) (24.2)\n",
      "Requirement already satisfied: pyparsing>=2.3.1 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from matplotlib) (3.2.1)\n",
      "Requirement already satisfied: python-dateutil>=2.7 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from matplotlib) (2.9.0.post0)\n",
      "Requirement already satisfied: importlib-metadata in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from diffusers) (8.6.1)\n",
      "Requirement already satisfied: huggingface-hub>=0.23.2 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from diffusers) (0.28.1)\n",
      "Requirement already satisfied: regex!=2019.12.17 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from diffusers) (2024.11.6)\n",
      "Requirement already satisfied: requests in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from diffusers) (2.32.3)\n",
      "Requirement already satisfied: safetensors>=0.3.1 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from diffusers) (0.5.2)\n",
      "Requirement already satisfied: pyyaml>=5.1 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from transformers) (6.0.2)\n",
      "Requirement already satisfied: tokenizers<0.22,>=0.21 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from transformers) (0.21.0)\n",
      "Requirement already satisfied: tqdm>=4.27 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from transformers) (4.67.1)\n",
      "Requirement already satisfied: torchvision in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from open_clip_torch) (0.17.1+cu121)\n",
      "Collecting ftfy (from open_clip_torch)\n",
      "  Downloading ftfy-6.3.1-py3-none-any.whl.metadata (7.3 kB)\n",
      "Collecting timm (from open_clip_torch)\n",
      "  Downloading timm-1.0.14-py3-none-any.whl.metadata (50 kB)\n",
      "Requirement already satisfied: six>=1.5 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from python-dateutil>=2.7->matplotlib) (1.17.0)\n",
      "Requirement already satisfied: wcwidth in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from ftfy->open_clip_torch) (0.2.13)\n",
      "Requirement already satisfied: zipp>=3.20 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from importlib-metadata->diffusers) (3.21.0)\n",
      "Requirement already satisfied: MarkupSafe>=2.0 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from jinja2->torch) (3.0.2)\n",
      "Requirement already satisfied: charset-normalizer<4,>=2 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from requests->diffusers) (3.4.1)\n",
      "Requirement already satisfied: idna<4,>=2.5 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from requests->diffusers) (3.10)\n",
      "Requirement already satisfied: urllib3<3,>=1.21.1 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from requests->diffusers) (2.3.0)\n",
      "Requirement already satisfied: certifi>=2017.4.17 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from requests->diffusers) (2025.1.31)\n",
      "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages (from sympy->torch) (1.3.0)\n",
      "Downloading open_clip_torch-2.30.0-py3-none-any.whl (1.5 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.5/1.5 MB\u001b[0m \u001b[31m127.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading ftfy-6.3.1-py3-none-any.whl (44 kB)\n",
      "Downloading timm-1.0.14-py3-none-any.whl (2.4 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.4/2.4 MB\u001b[0m \u001b[31m141.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hInstalling collected packages: ftfy, timm, open_clip_torch\n",
      "Successfully installed ftfy-6.3.1 open_clip_torch-2.30.0 timm-1.0.14\n",
      "Note: you may need to restart the kernel to use updated packages.\n"
     ]
    }
   ],
   "source": [
    "pip install torch matplotlib Pillow diffusers transformers open_clip_torch"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "2e1321ad982c4c2c81a5a66da4a31648",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading pipeline components...:   0%|          | 0/6 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "1caa94edbec445f498907b8e3b56ecac",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/32 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Image saved successfully at mixed_output.jpg\n"
     ]
    }
   ],
   "source": [
    "import PIL\n",
    "import torch\n",
    "import requests\n",
    "import open_clip\n",
    "from open_clip import SimpleTokenizer\n",
    "from io import BytesIO\n",
    "from diffusers import DiffusionPipeline\n",
    "from transformers import CLIPImageProcessor, CLIPModel\n",
    "\n",
    "\n",
    "def download_image(url):\n",
    "    response = requests.get(url)\n",
    "    return PIL.Image.open(BytesIO(response.content)).convert(\"RGB\")\n",
    "\n",
    "# Loading additional models\n",
    "feature_extractor = CLIPImageProcessor.from_pretrained(\n",
    "    \"laion/CLIP-ViT-B-32-laion2B-s34B-b79K\"\n",
    ")\n",
    "clip_model = CLIPModel.from_pretrained(\n",
    "    \"laion/CLIP-ViT-B-32-laion2B-s34B-b79K\", torch_dtype=torch.float16\n",
    ")\n",
    "coca_model = open_clip.create_model('coca_ViT-L-14', pretrained='laion2B-s13B-b90k').to('cuda')\n",
    "coca_model.dtype = torch.float16\n",
    "coca_transform = open_clip.image_transform(\n",
    "    coca_model.visual.image_size,\n",
    "    is_train=False,\n",
    "    mean=getattr(coca_model.visual, 'image_mean', None),\n",
    "    std=getattr(coca_model.visual, 'image_std', None),\n",
    ")\n",
    "coca_tokenizer = SimpleTokenizer()\n",
    "\n",
    "# Pipeline creating\n",
    "mixing_pipeline = DiffusionPipeline.from_pretrained(\n",
    "    \"CompVis/stable-diffusion-v1-4\",\n",
    "    custom_pipeline=\"clip_guided_images_mixing_stable_diffusion\",\n",
    "    clip_model=clip_model,\n",
    "    feature_extractor=feature_extractor,\n",
    "    coca_model=coca_model,\n",
    "    coca_tokenizer=coca_tokenizer,\n",
    "    coca_transform=coca_transform,\n",
    "    torch_dtype=torch.float16,\n",
    ")\n",
    "mixing_pipeline.enable_attention_slicing()\n",
    "mixing_pipeline = mixing_pipeline.to(\"cuda\")\n",
    "\n",
    "# Pipeline running\n",
    "generator = torch.Generator(device=\"cuda\").manual_seed(17)\n",
    "\n",
    "def download_image(url):\n",
    "    response = requests.get(url)\n",
    "    return PIL.Image.open(BytesIO(response.content)).convert(\"RGB\")\n",
    "\n",
    "content_image = download_image(\"https://huggingface.co/datasets/TheDenk/images_mixing/resolve/main/boromir.jpg\")\n",
    "style_image = download_image(\"https://huggingface.co/datasets/TheDenk/images_mixing/resolve/main/gigachad.jpg\")\n",
    "\n",
    "pipe_images = mixing_pipeline(\n",
    "    num_inference_steps=50,\n",
    "    content_image=content_image,\n",
    "    style_image=style_image,\n",
    "    noise_strength=0.65,\n",
    "    slerp_latent_style_strength=0.9,\n",
    "    slerp_prompt_style_strength=0.1,\n",
    "    slerp_clip_image_style_strength=0.1,\n",
    "    guidance_scale=9.0,\n",
    "    batch_size=1,\n",
    "    clip_guidance_scale=100,\n",
    "    generator=generator,\n",
    ").images\n",
    "\n",
    "output_path = \"mixed_output.jpg\"\n",
    "pipe_images[0].save(output_path)\n",
    "print(f\"Image saved successfully at {output_path}\")"
   ]
  }
 ],
 "metadata": {
  "language_info": {
   "name": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
